{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "12be10c7",
   "metadata": {},
   "source": [
    "### GAS7-MYH1 Fusion Example "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f1467221",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "10971665",
   "metadata": {},
   "outputs": [],
   "source": [
    "wkdir=\"/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3\"\n",
    "wkdir_path = Path(wkdir)\n",
    "\n",
    "misexp_vrnt_feat_path = wkdir_path.joinpath(\"6_misexp_dissect/vrnt_features/misexp_vrnt_features.tsv\")\n",
    "misexp_vrnt_feat_df = pd.read_csv(misexp_vrnt_feat_path, sep=\"\\t\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5ee8006a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# GAS7-MYH1 fusion information\n",
    "vrnt_id = \"402648\"\n",
    "rna_id = \"INT_RNA7961048\"\n",
    "sv_start = 10078018\n",
    "sv_end = 10512685"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1a4da42a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# FusionINspector coding results \n",
    "star_fusion_results_path = wkdir_path.joinpath(f\"6_misexp_dissect/star_fusion/results_stringent_qc/{rna_id}/star-fusion.fusion_predictions.abridged.coding_effect.tsv\")\n",
    "star_fusion_results_df = pd.read_csv(star_fusion_results_path, sep=\"\\t\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d4624522",
   "metadata": {},
   "outputs": [],
   "source": [
    "fusion_name = \"AC005747.1--MYH1\"\n",
    "myh1_fusion_results_df = star_fusion_results_df[star_fusion_results_df[\"#FusionName\"] == fusion_name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6ff1806f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of amino acids in new protein: 1664\n"
     ]
    }
   ],
   "source": [
    "gas7_myh1_aa_seq = myh1_fusion_results_df.FUSION_TRANSL.item()\n",
    "gas7_myh1_aa_seq_no_stop = gas7_myh1_aa_seq[:-1] # remove stop symbol\n",
    "total_residues = len(gas7_myh1_aa_seq_no_stop)\n",
    "print(f\"Number of amino acids in new protein: {total_residues}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "4e9b5498",
   "metadata": {},
   "outputs": [],
   "source": [
    "# count number of residues contributed by each fusion partner\n",
    "cds_left_range = myh1_fusion_results_df.CDS_LEFT_RANGE.item()\n",
    "cds_right_range = myh1_fusion_results_df.CDS_RIGHT_RANGE.item()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "edf1bef2",
   "metadata": {},
   "outputs": [],
   "source": [
    "cds_left_start, cds_left_end = [int(i) for i in cds_left_range.split(\"-\")]\n",
    "cds_right_start, cds_right_end = [int(i) for i in cds_right_range.split(\"-\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "a3f8e5fd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total residues left: 61.0\n",
      "Total residues right: 1603.0\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "codons_left = cds_left_end - (cds_left_start - 1)\n",
    "residues_left = codons_left/3 # keep start codon \n",
    "codons_right = cds_right_end - (cds_right_start - 1)\n",
    "residues_right = codons_right/3 -1 # remove stop codon \n",
    "total_codons = codons_left + codons_right\n",
    "print(f\"Total residues left: {residues_left}\")\n",
    "print(f\"Total residues right: {residues_right}\")\n",
    "residues_left + residues_right == total_residues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9dfb0542",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
